# 1) Data preparation ####

##### 1.1) Loading data ####

od <- haven::read_spss("nonsharable_data/L_Concepties_experiment_1.0p.sav") %>% 
  rename('id' = nomem_encr)

ri <- haven::read_spss("nonsharable_data/Politics_and_values_w15.sav") %>% 
  rename('id' = nomem_encr)

pop <- haven::read_spss("nonsharable_data/popbarow13-12-11.sav") %>% 
  rename('id' = nomem_encr)

origin <- haven::read_spss("nonsharable_data/avars_202302_EN_1.0p.sav") %>%
  rename('id' = nomem_encr)

##### 1.2) Cleaning experiment ####

age <- od %>% 
  select(id, contains("age")) %>% 
  pivot_longer(age1:age5, names_to = "var", values_to = "age") %>% 
  mutate(round = substr(var, 4,4)) %>% 
  select(id, round, age)

table(age$age)  

gender <- od %>% 
  select(id, contains("gender")) %>% 
  pivot_longer(gender1:gender5, names_to = "var", values_to = "gender") %>% 
  mutate(round = substr(var, 7,7)) %>% 
  select(id, round, gender)

table(gender$gender)  

country <- od %>% 
  select(id, contains("country")) %>% 
  pivot_longer(country1:country5, names_to = "var", values_to = "country") %>% 
  mutate(round = substr(var, 8,8)) %>% 
  select(id, round, country)

table(country$country)  

integration <- od %>% 
  select(id, contains("integration")) %>% 
  pivot_longer(integration1:integration5, names_to = "var", values_to = "integration") %>% 
  mutate(round = substr(var, 12,12)) %>% 
  select(id, round, integration)

table(integration$integration)  

table(country$country == 1, integration$integration == 0)

nationalism <- od %>% 
  select(id, contains("nationalism")) %>% 
  pivot_longer(nationalism1:nationalism5, names_to = "var", values_to = "nationalism") %>% 
  mutate(round = substr(var, 12,12)) %>% 
  select(id, round, nationalism)

table(nationalism$nationalism)  

occupation <- od %>% 
  select(id, contains("occupation")) %>% 
  pivot_longer(occupation1:occupation5, names_to = "var", values_to = "occupation") %>% 
  mutate(round = substr(var, 11,11)) %>% 
  select(id, round, occupation)

table(occupation$occupation)

religion <- od %>% 
  select(id, contains("religion")) %>% 
  pivot_longer(religion1:religion5, names_to = "var", values_to = "religion") %>% 
  mutate(round = substr(var, 9,9)) %>% 
  select(id, round, religion)

table(religion$religion)

q1 <- od %>% 
  select(id, contains("Q1")) %>% 
  pivot_longer(Q1_v1:Q1_v5, names_to = "var", values_to = "Q1") %>% 
  mutate(round = substr(var, 5,5)) %>% 
  select(id, round, Q1)

table(q1$Q1)

q2 <- od %>% 
  select(id, contains("Q2")) %>% 
  pivot_longer(Q2_v1:Q2_v5, names_to = "var", values_to = "Q2") %>% 
  mutate(round = substr(var, 5,5)) %>% 
  select(id, round, Q2)

table(q2$Q2)

q3 <- od %>% 
  select(id, contains("Q3")) %>% 
  pivot_longer(Q3_v1:Q3_v5, names_to = "var", values_to = "Q3") %>% 
  mutate(round = substr(var, 5,5)) %>% 
  select(id, round, Q3)

table(q3$Q3)

q4 <- od %>% 
  select(id, contains("Q4")) %>% 
  pivot_longer(Q4_v1:Q4_v5, names_to = "var", values_to = "Q4") %>% 
  mutate(round = substr(var, 5,5)) %>% 
  select(id, round, Q4)

table(q4$Q4)

q5 <- od %>% 
  select(id, contains("q5")) %>% 
  pivot_longer(Q5_v1:Q5_v5, names_to = "var", values_to = "Q5") %>% 
  mutate(round = substr(var, 5,5)) %>% 
  select(id, round, Q5)

table(q5$Q5)

df_exp <- dplyr::left_join(age, gender) %>% 
  dplyr::left_join(., country) %>% 
  dplyr::left_join(., integration) %>% 
  dplyr::left_join(., nationalism) %>% 
  dplyr::left_join(., occupation) %>% 
  dplyr::left_join(., religion) %>% 
  dplyr::left_join(., q1) %>% 
  dplyr::left_join(., q2) %>% 
  dplyr::left_join(., q3) %>% 
  dplyr::left_join(., q4) %>% 
  dplyr::left_join(., q5)  %>% 
  mutate(age = factor(age, 
                      levels = c(1:3),
                      labels = c("25", "45", "65")),
         gender = factor(gender,
                         levels = c(1,2),
                         labels = c("male", "female")),
         country = factor(country,
                          levels = c(1:4),
                          labels = c("NL", "IE", "MA", "SY")),
         integration = factor(integration,
                              levels = c(0,1,2),
                              labels = c("Dutch", "Speaks Dutch well", "Speaks some Dutch")),
         nationalism = factor(nationalism, 
                              levels = c(2,1),
                              labels = c("Does not celebrate", "Celebrates holidays")),
         occupation = factor(occupation, 
                             levels = c(1, 2, 5, 6, 3, 4, 7, 8),
                             labels = c("Politician", "Banker", "University lecturer", "Mechanic", "Farmer", "Fruitpicker",
                                        "Cleaner", "Unemployed")),
         religion = factor(religion,
                           levels = c(3,1,2),
                           labels = c("Not religious", "Christian", "Muslim")),
         origin_language = factor(paste0(country, "-", integration),
                                  levels = c("NL-Dutch", "IE-Speaks some Dutch", "IE-Speaks Dutch well",
                                             "MA-Speaks some Dutch", "MA-Speaks Dutch well",
                                             "SY-Speaks some Dutch", "SY-Speaks Dutch well")))

##### 1.3) Cleaning respondent information ####

df_ri <- ri %>% 
  select(id, cv23o101, cv23o308, cv23o012, cv23o050) %>%
  rowwise() %>%
  select(id, left_right = cv23o101, vote_choice = cv23o308, political_interest = cv23o012, political_efficacy = cv23o050)

df_origin <- origin %>%
  select(id, herkomstgroep, geslacht, leeftijd, oplcat) %>%
  rowwise() %>%
  select(id, origin_resp = herkomstgroep, gender_resp = geslacht, age_resp = leeftijd, education_resp = oplcat)

##### 1.4) Cleaning populism data ####

df_pop <- pop %>% 
  select(id, pop1:pop6) %>% 
  rowwise() %>%
  mutate(pop = mean(c(pop1, pop2, pop3, pop4, pop5, pop6), na.rm = T)) %>% 
  select(id, pop)

##### 1.5) combine data ####

df <- dplyr::left_join(df_exp, df_pop) %>% 
  dplyr::left_join(., df_ri) %>% 
  dplyr::left_join(., df_origin) %>%
  mutate(pop_d = factor(ifelse(pop >= mean(pop, na.rm = T), "High", "Low"),
                        levels = c("Low", "High")),
         pop_3 = factor(ifelse(pop >= 3, "High", "Low"),
                        levels = c("Low", "High")),
         pop_4 = factor(ifelse(pop >= 4, "High", "Low"),
                        levels = c("Low", "High")),
         pop_mean = factor(ifelse(pop >= mean(pop, na.rm = T), "High",
                                  ifelse(pop < mean(pop, na.rm = T),"Low", NA)),
                           levels = c("Low", "High")),
         left_right_cat3 = factor(ifelse(left_right < 4, "Left",
                                         ifelse(left_right < 7, "Centre",
                                                ifelse(left_right %in% c(7:10), "Right", NA))),
                                  levels = c("Centre", "Left", "Right")),
         pop_ideo = factor(ifelse(left_right_cat3 == "Left" & pop_mean == "High", "LWP",
                                  ifelse(left_right_cat3 == "Right" & pop_mean == "High", "RWP",
                                         ifelse(left_right_cat3 == "Centre" & pop_mean == "High", "CP", "Non-Populist"))),
                           levels = c("Non-Populist", "LWP", "RWP", "CP")),
         pop_ideo6 = factor(ifelse(left_right_cat3 == "Left" & pop_mean == "High", "LWP",
                                   ifelse(left_right_cat3 == "Right" & pop_mean == "High", "RWP",
                                          ifelse(left_right_cat3 == "Centre" & pop_mean == "High", "CP", 
                                                 ifelse(left_right_cat3 == "Left" & pop_mean == "Low", "LWnP", 
                                                        ifelse(left_right_cat3 == "Right" & pop_mean == "Low", "RWnP",
                                                               ifelse(left_right_cat3 == "Centre" & pop_mean == "Low", "CnP", NA)))))),
                            levels = c("CnP", "CP", "LWnP", "LWP", "RWnP", "RWP")),
         pop_vote = factor(ifelse(vote_choice %in% c(3, 7, 14, 18, 19), "Yes", "No")),
         Lpop_vote = ifelse(vote_choice %in% c(7), "Yes", "No"),
         Rpop_vote = ifelse(vote_choice %in% c(3, 14, 18, 19), "Yes", "No"), 
         pop_vote2 = factor(ifelse(Lpop_vote == "Yes", "LWP", 
                                   ifelse(Rpop_vote == "Yes", "RWP", NA))), 
         origin_resp = factor(origin_resp, 
                              levels = c(0, 101, 102, 201, 202), 
                              labels = c("Dutch", "First generation western", "First generation non-western", "Second generation western", "Second generation non-western")), 
         gender_resp = factor(gender_resp, 
                              levels = c(1, 2, 3), 
                              labels = c("Male", "Female", "Other")), 
         education_resp = factor(education_resp, 
                                 levels = c(1:9), 
                                 labels = c("Primary school", "Intermediate secondary education", 
                                            "Higher secondary education", "Intermediate vocational education", 
                                            "Higher vocational training", "University", "Other", "Not (yet) completed any education", 
                                            "Not yet started any education")))

# 1.6) Populism Wave 13 data ####

popW13 <- haven::read_spss("nonsharable_data/L_populisme_wave13_1.0p.sav") %>% 
  rename('id' = nomem_encr)

df_popW13 <- popW13 %>% 
  select(id, v1a:v2c) %>% 
  rowwise() %>%
  mutate(popw13 = mean(c(v1a, v1b, v1c, v2a, v2b, v2c), na.rm = T)) %>% 
  select(id, popw13)

df <- dplyr::left_join(df, df_popW13) %>% 
  mutate(pop_dw13 = factor(ifelse(popw13 >= mean(pop, na.rm = T), "High", "Low"),
                           levels = c("Low", "High")),
         pop_3w13 = factor(ifelse(popw13 >= 3, "High", "Low"),
                           levels = c("Low", "High")),
         pop_4w13 = factor(ifelse(popw13 >= 4, "High", "Low"),
                           levels = c("Low", "High")),
         pop_meanw13 = factor(ifelse(popw13 >= mean(popw13, na.rm = T), "High",
                                     ifelse(pop < mean(popw13, na.rm = T),"Low", NA)),
                              levels = c("Low", "High")), 
         pop_ideow13 = factor(ifelse(left_right_cat3 == "Left" & pop_meanw13 == "High", "LWP",
                                     ifelse(left_right_cat3 == "Right" & pop_meanw13 == "High", "RWP",
                                            ifelse(left_right_cat3 == "Centre" & pop_meanw13 == "High", "CP", "Non-Populist"))),
                              levels = c("Non-Populist", "LWP", "RWP", "CP")))

# 1.7) Anti-Immigration data ####

anti_immigration <- haven::read_spss("nonsharable_data//Politics_and_values_w15.sav") %>% 
  rename('id' = nomem_encr)

df_anti_immigration <- anti_immigration %>% 
  select(id, cv23o104) %>% 
  rowwise() %>%
  select(id, anti_immigration = cv23o104)

df <- dplyr::left_join(df, df_anti_immigration) %>% 
  mutate(anti_immigration_cat3 = factor(ifelse(anti_immigration %in% c(1:2), "Pro-migration",
                                               ifelse(anti_immigration == 3, "Centre",
                                                      ifelse(anti_immigration %in% c(4:5), "Anti-migration", NA))),
                                        levels = c("Centre", "Pro-migration", "Anti-migration")),
         pop_ideo_imm = factor(ifelse(anti_immigration_cat3 == "Pro-migration" & pop_mean == "High", "PMP", #pmp = pro-migration populist
                                      ifelse(anti_immigration_cat3 == "Anti-migration" & pop_mean == "High", "AMP", #amp = anti-migration populist
                                             ifelse(anti_immigration_cat3 == "Centre" & pop_mean == "High", "CMP", "Non-Populist"))),
                               levels = c("Non-Populist", "PMP", "AMP", "CMP")))

# 1.8) Saving Data ####

saveRDS(df, "final_data/data_clean.RDS")
